home *** CD-ROM | disk | FTP | other *** search
- static char rcsid[] = "$Id: wildmat.c,v 1.1 1992/09/06 19:31:32 mike Exp $";
-
- /* $Log: wildmat.c,v $
- * Revision 1.1 1992/09/06 19:31:32 mike
- * Initial revision
- *
- */
-
- /*
- ** Do shell-style pattern matching for ?, \, [], and * characters.
- ** Might not be robust in face of malformed patterns; e.g., "foo[a-"
- ** could cause a segmentation violation. It is 8bit clean.
- **
- ** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
- ** Rich $alz is now <rsalz@bbn.com>.
- ** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
- ** This can greatly speed up failing wildcard patterns. For example:
- ** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
- ** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
- ** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
- ** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
- ** the ABORT, then it takes 22310 calls to fail. Ugh.
- **
- ** C Durland added:
- ** MS-DOS uses \ for path, so do nothing for \ in that case.
- ** Put man page at the end of this file.
- */
-
- #include "os.h" /* only used for MSDOZ, ATARI --CD */
-
- #define TRUE 1
- #define FALSE 0
- #define ABORT -1
-
- #define NEGATE_CLASS '^'
-
- /* Forward declaration. */
- static int DoMatch();
-
- /*
- ** See if the text matches the p, which has an implied leading asterisk.
- */
- static int
- Star(text, p)
- register char *text;
- register char *p;
- {
- register int ret;
-
- do
- ret = DoMatch(text++, p);
- while (ret == FALSE);
- return ret;
- }
-
-
- /*
- ** Match text and p, return TRUE, FALSE, or ABORT.
- */
- static int
- DoMatch(text, p)
- register char *text;
- register char *p;
- {
- register int last;
- register int matched;
- register int reverse;
-
- for ( ; *p; text++, p++) {
- if (*text == '\0' && *p != '*')
- return ABORT;
- switch (*p) {
- #if !(MSDOZ || ATARI) /* blame those bozos at Microsoft */
- case '\\':
- /* Literal match with following character. */
- p++;
- /* FALLTHROUGH */
- #endif
- default:
- if (*text != *p)
- return FALSE;
- continue;
- case '?':
- /* Match anything. */
- continue;
- case '*':
- /* Trailing star matches everything. */
- return *++p ? Star(text, p) : TRUE;
- case '[':
- if (reverse = p[1] == NEGATE_CLASS)
- /* Inverted character class. */
- p++;
- for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
- /* This next line requires a good C compiler. */
- if (*p == '-' ? *text <= *++p && *text >= last : *text == *p)
- matched = TRUE;
- if (matched == reverse)
- return FALSE;
- continue;
- }
- }
-
- return *text == '\0';
- }
-
-
- /*
- ** User-level routine. Returns TRUE or FALSE.
- */
- int
- wildmat(text, p)
- char *text;
- char *p;
- {
- return DoMatch(text, p) == TRUE;
- }
-
-
-
- #ifdef TEST
- #include <stdio.h>
-
- /* Yes, we use gets not fgets. Sue me. */
- extern char *gets();
-
-
- main()
- {
- char p[80];
- char text[80];
-
- printf("Wildmat tester. Enter pattern, then strings to test.\n");
- printf("A blank line gets prompts for a new pattern; a blank pattern\n");
- printf("exits the program.\n\n");
-
- for ( ; ; ) {
- printf("Enter pattern: ");
- (void)fflush(stdout);
- if (gets(pattern) == NULL || pattern[0] == '\n')
- break;
- for ( ; ; ) {
- printf("Enter text: ");
- (void)fflush(stdout);
- if (gets(text) == NULL)
- exit(0);
- if (text[0] == '\0')
- /* Blank line; go back and get a new pattern. */
- break;
- printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO");
- }
- }
-
- exit(0);
- /* NOTREACHED */
- }
- #endif /* TEST */
-
-
-
- #if 0 /* the man page */
-
- WILDMAT(3) WILDMAT(3)
-
-
- NAME
- wildmat - perform shell-style wildcard matching
-
- SYNOPSIS
- int
- wildmat(text, pattern)
- char *text;
- char *pattern;
-
- DESCRIPTION
- Wildmat compares the text against the pattern and returns non-zero if
- the pattern matches the text. The pattern is interpreted similar to
- shell filename wildcards, and not as a full regular expression such as
- those handled by the grep(1) family of programs or the regex(3) or
- regexp(3) set of routines.
-
- The pattern is interpreted according to the following rules:
-
- \x Turns off the special meaning of x and matches it directly; this
- is used mostly before a question mark or asterisk, and is not
- valid inside square brackets.
-
- ? Matches any single character.
-
- * Matches any sequence of zero or more characters.
-
- [x...y]
- Matches any single character specified by the set x...y, where
- any character other than minus sign or close bracket may appear
- in the set. A minus sign may be used to indicate a range of
- characters. That is, [0-5abc] is a shorthand for [012345abc].
- More than one range may appear inside a character set; [0-9a-zA-
- Z._] matches almost all of the legal characters for a host name.
-
- [^x...y]
- This matches any character not in the set x...y, which is
- interpreted as described above.
-
- BUGS
- There is no way to specify a minus sign in a character range.
-
- HISTORY
- Written by Rich $alz <rsalz@bbn.com> in 1986, and posted to Usenet
- several times since then, most notably in comp.sources.misc in March,
- 1991.
- Lars Mathiesen <thorinn@diku.dk> enhanced the multi-asterisk failure
- mode in early 1991.
-
- SEE ALSO
- grep(1), regex(3), regexp(3).
-
-
- - 1 - Formatted: January 17, 1992
-
- #endif
-